library(scRNASeqHumanTrapnellMyoblast)
data("myoblastHumanFPKM")
pd <- pData(myoblastHumanFPKM)
eset <- exprs(myoblastHumanFPKM)
# remove control cells, debris cells, and wells with more than 1 cell
pd <- pd[pd$sampleType == "SC" & pd$control == "control well: FALSE" &
pd$debris == "debris: FALSE" &
pd$numcells == "cells in well: 1", ]
eset <- eset[, match(rownames(pd), colnames(eset))]
# calculate PDG
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
pdTrapnell <- pd
eTrapnell <- log2(eset + 1) # log transform FPKMs
# calculate standardized pearson contingency coefficient
t1 <- factor(pdTrapnell$hour)
t2 <- factor(paste(pdTrapnell$runID, pdTrapnell$fcID, sep = "_"))
nr <- length(levels(t1))
nc <- length(levels(t2))
assocstats(table(t1, t2))$cont / (((nr-1) / nr) * ((nc-1)/nc))^(1/4)
## [1] 1
library(scRNASeqMouseShalekDendritic)
data("dendriticMouseTPM")
eset <- exprs(dendriticMouseTPM)
pd <- pData(dendriticMouseTPM)
keepIDs <- (grepl(pattern = "BMDC \\(([0-9]+h)", pd$source_name_ch1) |
grepl(pattern = "(Unstimulation)", pd$source_name_ch1)) &
(!grepl(pattern = "IFN-B", pd$source_name_ch1) &
!grepl(pattern = "StimulationReplicate Experiment)", pd$source_name_ch1))
pd <- pd[keepIDs, ]
eset <- eset[, keepIDs]
pd$source_name_ch1 <- factor(pd$source_name_ch1)
pd$stim <- ifelse(!grepl(pattern = "(Unstimulation)", pd$source_name_ch1),
str_sub(unlist(lapply(str_split(pd$source_name_ch1, " ", n = 4),
function(x) x[4])), start = 1, end = -2),
str_sub(unlist(lapply(str_split(pd$source_name_ch1, " ", n = 4),
function(x) x[2])), start = 2, end = -2))
pd$time <- ifelse(grepl(pattern = "(Unstimulation)", pd$source_name_ch1), NA,
str_sub(unlist(lapply(str_split(pd$source_name_ch1, " ", n = 2),
function(x) x[2])), start = 2, end = -18))
pd$cond <- ifelse(grepl(pattern = "(Unstimulation)",
pd$source_name_ch1), NA,
str_sub(unlist(lapply(str_split(pd$source_name_ch1, " ", n = 2),
function(x) x[2])), start = 5, end = -13))
# calculate PDG
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
# Subset for only the LPS experimental condition
keepIDs <- grepl(pattern = "^LPS_([0-9]+h)_S", pd$title)
eShalek <- log2(eset[, keepIDs] + 1) # log transform FPKMs
pdShalek <- pd[keepIDs, ]
# calculate standardized pearson contingency coefficient
t1 <- factor(pdShalek$time)
t2 <- factor(paste(pdShalek$runID, pdShalek$fcLane, sep = "_"))
nr <- length(levels(t1))
nc <- length(levels(t2))
assocstats(table(t1, t2))$cont / (((nr-1) / nr) * ((nc-1)/nc))^(1/4)
## [1] 1
library(scRNASeqMouseTreutleinLineage)
data("lungMouseFPKM")
pd <- pData(lungMouseFPKM)
eset <- exprs(lungMouseFPKM)
# remove bulk and no cell
pd <- pd[pd$sampleType == "SC", ]
eset <- eset[, match(rownames(pd), colnames(eset))]
# calculate PDG
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
pdTreutlein <- pd
eTreutlein <- log2(eset + 1) # log transform FPKMs
levels(pdTreutlein$day) <- c(levels(pdTreutlein$day)[1:3], "Adult")
# calculate standardized pearson contingency coefficient
t1 <- pdTreutlein$day
t2 <- factor(paste(pdTreutlein$runID, pdTreutlein$fcID, sep = "_"))
nr <- length(levels(t1))
nc <- length(levels(t2))
assocstats(table(t1, t2))$cont / (((nr-1) / nr) * ((nc-1)/nc))^(1/4)
## [1] 0.9277192
library(scRNASeqMouseDengMonoAllelic)
data("embryoMouseRPKM")
eset <- exprs(embryoMouseRPKM)
pd <- pData(embryoMouseRPKM)
# calculate PDG
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
pdDeng <- pd
eDeng <- log2(eset + 1) # log transform RPKMs
# calculate standardized pearson contingency coefficient
t1 <- pdDeng$time
t2 <- factor(paste(pdDeng$runID, pdDeng$fcLane, sep = "_"))
nr <- length(levels(t1))
nc <- length(levels(t2))
assocstats(table(t1, t2))$cont / (((nr-1) / nr) * ((nc-1)/nc))^(1/4)
## [1] 0.9661321
library(scRNASeqHumanPatelGlioblastoma)
data("glioHumanCount")
eset <- assay(glioHumanCount)
pd <- colData(glioHumanCount)
PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
data("glioHumanTPM")
eset <- exprs(glioHumanTPM)
pd <- pData(glioHumanTPM)
pd$PDG <- PDG[match(rownames(pd), names(PDG))]
ePatel<- eset[, pd$sampleType == "SC"]
pdPatel <- pd[pd$sampleType == "SC", ]
# calculate standardized pearson contingency coefficient
t1 <- pdPatel$tumorName
t2 <- factor(paste(pdPatel$runID, pdPatel$fcLane, sep = "_"))
nr <- length(levels(t1))
nc <- length(levels(t2))
assocstats(table(t1, t2))$cont / (((nr-1) / nr) * ((nc-1)/nc))^(1/4)
## [1] 0.9898464
##### Figure 2A
dat <- sweep(ePatel, 1, rowMeans(ePatel), FUN = "-")
s <- svd(ePatel)
scores <- data.frame(pdPatel, s$v[, 1:5])
pc1Var <- c(s$d^2 / sum(s$d^2))[1]
PC1.Patel <- ggplot(scores, aes(x = PDG, y = X1)) +
geom_point(size = 2, color = "blue") +
ylab(paste0("Principal Component 1 (", round(pc1Var,2)*100, "%)")) +
xlab("Proportion of detected genes") +
labs(title = paste0("Patel et al., 2014 (corr: ",
round(cor(scores$X1, scores$PDG), 2), ")")) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20))
##### Figure 2B
dat <- sweep(eTrapnell, 1, rowMeans(eTrapnell), FUN = "-")
s <- svd(dat)
scores <- data.frame(pdTrapnell, s$v[, 1:5],
"Batch" = factor(paste0(pdTrapnell$runID, pdTrapnell$fcLane)))
pc1Var <- c(s$d^2 / sum(s$d^2))[1]
scores$X1 <- scores$X1*(-1)
PC1.Trapnell <- ggplot(scores, aes(x = PDG, y = X1)) +
geom_point(size = 2, color = "blue") +
ylab(paste0("Principal Component 1 (", round(pc1Var,2)*100, "%)")) +
xlab("Proportion of detected genes") +
labs(title = paste0("Trapnell et al., 2014 (corr: ",
round(cor(scores$X1, scores$PDG), 2), ")")) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20))
##### Figure 2C
dat <- sweep(eShalek, 1, rowMeans(eShalek), FUN = "-")
s <- svd(dat)
scores <- data.frame(pdShalek, s$v[, 1:5],
"Batch" = factor(paste0(pdShalek$runID, pdShalek$fcLane)))
pc1Var <- c(s$d^2 / sum(s$d^2))[1]
PC1.Shalek <- ggplot(scores, aes(x = PDG, y = X1)) +
geom_point(size = 2, color = "blue") +
ylab(paste0("Principal Component 1 (", round(pc1Var,2)*100, "%)")) +
xlab("Proportion of detected genes") +
labs(title = paste0("Shalek et al., 2014 (corr: ",
round(cor(scores$X1, scores$PDG), 2), ")")) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20))
##### Figure 2D
dat <- sweep(eTreutlein, 1, rowMeans(eTreutlein), FUN = "-")
s <- svd(dat)
scores <- data.frame(pdTreutlein, s$v[, 1:5])
pc1Var <- c(s$d^2 / sum(s$d^2))[1]
PC1.Treutlein <- ggplot(scores, aes(x = PDG, y = X1)) +
geom_point(size = 2, color = "blue") +
ylab(paste0("Principal Component 1 (", round(pc1Var,2)*100, "%)")) +
xlab("Proportion of detected genes") +
labs(title = paste0("Treutlein et al., 2014 (corr: ",
round(cor(scores$X1, scores$PDG), 2), ")")) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20))
##### Figure 2E and 2F
zz <- levels(pdDeng$time)
pdDeng$bin <- ifelse(pdDeng$time %in% zz[1:2], "Bin A",
ifelse(pdDeng$time %in% zz[3:4], "Bin B",
ifelse(pdDeng$time %in% zz[5:7], "Bin C", "Bin D")))
dat <- sweep(eDeng[, pdDeng$bin == "Bin C"], 1, rowMeans(eDeng[, pdDeng$bin == "Bin C"]), FUN = "-")
s <- svd(dat)
pcVar <- c(s$d^2 / sum(s$d^2))[1:3]
scores <- data.frame(pdDeng[pdDeng$bin == "Bin C", ], s$v[, 1:3])
PC1.Deng.E <- ggplot(scores, aes(x = PDG, y = X1)) +
geom_point(size = 2, color = "blue") +
ylab(paste0("Principal Component 1 (", round(pc1Var,2)*100, "%)")) +
xlab("Proportion of detected genes") +
labs(title = paste0("Deng et al., 2014 (Group C) (corr: ",
round(cor(scores$X1, scores$PDG), 2), ")")) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20))
dat <- sweep(eDeng[, pdDeng$bin %in% c("Bin D")], 1,
rowMeans(eDeng[, pdDeng$bin == c("Bin D")]), FUN = "-")
s <- svd(dat)
pcVar <- c(s$d^2 / sum(s$d^2))[1:3]
scores <- data.frame(pdDeng[pdDeng$bin %in% c("Bin D"), ], s$v[, 1:3])
PC1.Deng.F <- ggplot(scores, aes(x = PDG, y = X1)) +
geom_point(size = 2, color = "blue") +
ylab(paste0("Principal Component 1 (", round(pc1Var,2)*100, "%)")) +
xlab("Proportion of detected genes") +
labs(title = paste0("Deng et al., 2014 (Group D) (corr: ",
round(cor(scores$X1, scores$PDG), 2), ")")) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20))
p1 <- plot_grid(PC1.Patel, PC1.Trapnell, PC1.Shalek, PC1.Treutlein, PC1.Deng.E, PC1.Deng.F,
labels = c("A", "B", "C", "D", "E", "F"), label_size = 25, ncol = 3)
p1
# pdf(paste0(working_path, "scBatchFig2.pdf"), width = 18, height = 12)
# print(p1)
# dev.off()
colTum <- brewer.pal(8, "Dark2")[c(1:5)]
colRun <- brewer.pal(9, "Set1")[c(3,5,4, 8, 1:2)]
Tumor = factor(pdPatel$tumorName)
levels(Tumor) <- c("Tumor 5", paste0("Tumor ", 1:4))
Tumor = factor(Tumor,levels(Tumor)[c(2:5, 1)])
Batch = factor(pdPatel$runID)
levels(Batch) <- c("Batch 5", "Batch 2", "Batch 3", "Batch 6", "Batch 1", "Batch 4")
Batch = factor(Batch,levels(Batch)[c(5, 2, 3, 6, 1, 4)])
PDG = pdPatel$PDG
s <- svd(ePatel - rowMeans(ePatel))
pcVar <- c(s$d^2 / sum(s$d^2))[1:3]
scores <- data.frame(Tumor, Batch, PDG, s$v[,1:3])
PC1.2.byTumor <- ggplot(scores, aes(x = X1, y = X2, colour = Tumor)) +
geom_point(size = 4) +
scale_colour_manual(values = colTum, name="Biological\nGroup") +
xlab(paste0("Principal Component 1 (", round(pcVar[1],2)*100, "%)")) +
ylab(paste0("Principal Component 2 (", round(pcVar[2],2)*100, "%)"))
PC1.2.byBatch <- ggplot(scores, aes(x = X1, y = X2, colour = Batch)) +
geom_point(size = 4) + scale_colour_manual(values = colRun) +
xlab(paste0("Principal Component 1 (", round(pcVar[1],2)*100, "%)")) +
ylab(paste0("Principal Component 2 (", round(pcVar[2],2)*100, "%)"))
Batch = factor(Batch, levels = names(sort(unlist(lapply( split(PDG, Batch), median)),
decreasing = FALSE)))
dat = data.frame("Batch" = Batch, "Fraction" = PDG)
p.boxplot <- ggplot(dat, aes(Batch, Fraction, fill = Batch)) + geom_boxplot() +
xlab(" ") + ylab(" ") +
scale_fill_manual(values = colRun[factor(levels(Batch))],
breaks=paste0("Batch ", 1:6)) +
labs(title = "Proportion of detected genes") +
theme(axis.text.x = element_blank())
subIDs <- (pdPatel$tumorName == "MGH26")
BatchSub = factor(pdPatel$runID[subIDs])
levels(BatchSub) <- c("Batch 5", "Batch 6")
PDGSub <- PDG[subIDs]
s <- svd(ePatel[,subIDs] - rowMeans(ePatel[,subIDs]))
pcVar <- c(s$d^2 / sum(s$d^2))[1:3]
scores.sub <- data.frame(BatchSub, PDGSub, s$v[,1:3])
PC1.2.tumMGH26 <- ggplot(scores.sub, aes(x = X1, y = X2, colour = BatchSub)) +
geom_point(size = 4) +
xlab(paste0("Principal Component 1 (", round(pcVar[1],2)*100, "%)")) +
ylab(paste0("Principal Component 2 (", round(pcVar[2],2)*100, "%)")) +
scale_colour_manual(values = colRun[c(5,6)]) +
labs(title = "Only Biological Group 5")
p1 <- plot_grid(PC1.2.byTumor, PC1.2.byBatch, PC1.2.tumMGH26,
p.boxplot, cols = 2,
labels = c("A", "B", "C", "D"), label_size = 20)
p1
# pdf(paste0(working_path, "scBatchFig3.pdf"), width = 12, height = 8)
# print(p1)
# dev.off()
pout <- ggplot(scores.sub, aes(x = BatchSub, y = PDGSub, fill = BatchSub)) +
geom_boxplot() +
ylab("Proportion of detected genes") +
scale_fill_manual(values = colRun[c(5,6)]) +
labs(title = "Patel et al. (2014) - Cells from the same\nbiological group (Tumor 5) processed in two batches") +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15))
pout
# pdf(paste0(working_path, "scBatchSuppFig-2.pdf"), width = 12, height = 10)
# print(p1)
# dev.off()
zz <- levels(pdDeng$time)
pdDeng$bin <- ifelse(pdDeng$time %in% zz[1:2], "Bin A",
ifelse(pdDeng$time %in% zz[3:4], "Bin B",
ifelse(pdDeng$time %in% zz[5:7], "Bin C", "Bin D")))
dat = data.frame("Proportion" = c(pdTrapnell$PDG, pdShalek$PDG,
pdTreutlein$PDG, pdDeng$PDG[pdDeng$bin == "Bin C"],
pdDeng$PDG[pdDeng$bin == "Bin D"]),
"Median" = c(apply(eTrapnell, 2, function(z){ median(z[z>0])}),
apply(eShalek, 2, function(z){ median(z[z>0])}),
apply(eTreutlein, 2, function(z){ median(z[z>0])}),
apply(eDeng[, pdDeng$bin == "Bin C"], 2, function(z){ median(z[z>0])}),
apply(eDeng[, pdDeng$bin == "Bin D"], 2, function(z){ median(z[z>0])})),
"Study" = factor(c(rep("Trapnell", nrow(pdTrapnell)),
rep("Shalek", nrow(pdShalek)),
rep("Treutlein", nrow(pdTreutlein)),
rep("Deng C", nrow(pdDeng[pdDeng$bin == "Bin C", ])),
rep("Deng D", nrow(pdDeng[pdDeng$bin == "Bin D", ]))),
levels = c("Trapnell", "Shalek", "Treutlein",
"Deng C", "Deng D")))
p.Trapnell <- dat %>% filter(Study == "Trapnell") %>% ggplot(aes(x = Proportion, y = Median)) +
geom_point() +
stat_smooth(se = FALSE, degree = 1, span = 0.75, size = 2) +
xlab("Proportion of detected genes") +
ylab("Median expression (log2)") +
labs(title = "Trapnell et al., 2014") +
theme(plot.title = element_text(size =20),
axis.text = element_text(size = 20),
axis.title = element_text(size =20))
p.Shalek <- dat %>% filter(Study == "Shalek") %>% ggplot(aes(x = Proportion, y = Median)) +
geom_point() +
stat_smooth(se = FALSE, degree = 1, span = 0.75, size = 2) +
xlab("Proportion of detected genes") +
ylab("Median expression (log2)") +
labs(title = "Shalek et al., 2014") +
theme(plot.title = element_text(size =20),
axis.text = element_text(size = 20),
axis.title = element_text(size =20))
p.Treutlein <- dat %>% filter(Study == "Treutlein") %>% ggplot(aes(x = Proportion, y = Median)) +
geom_point() +
stat_smooth(se = FALSE, degree = 1, span = 0.75, size = 2) +
xlab("Proportion of detected genes") +
ylab("Median expression (log2)") +
labs(title = "Treutlein et al., 2014") +
theme(plot.title = element_text(size =20),
axis.text = element_text(size = 20),
axis.title = element_text(size =20))
p.Deng.C <- dat %>% filter(Study == "Deng C") %>% ggplot(aes(x = Proportion, y = Median)) +
geom_point() +
stat_smooth(se = FALSE, degree = 1, span = 0.75, size = 2) +
xlab("Proportion of detected genes") +
ylab("Median expression (log2)") +
labs(title = "Deng et al., 2014 (Group C)") +
theme(plot.title = element_text(size =20),
axis.text = element_text(size = 20),
axis.title = element_text(size =20))
p.Deng.D <- dat %>% filter(Study == "Deng D") %>% ggplot(aes(x = Proportion, y = Median)) +
geom_point() +
stat_smooth(se = FALSE, degree = 1, span = 0.75, size = 2) +
xlab("Proportion of detected genes") +
ylab("Median expression (log2)") +
labs(title = "Deng et al., 2014 (Group D)") +
theme(plot.title = element_text(size =20),
axis.text = element_text(size = 20),
axis.title = element_text(size =20))
p1 <- plot_grid(p.Trapnell, p.Shalek, p.Treutlein, p.Deng.C, p.Deng.D, ncol = 3,
labels = LETTERS[1:5], label_size = 25)
p1
# pdf(paste0(working_path, "scBatchFig4.pdf"), width = 16, height = 10)
# print(p1)
# dev.off()
##### Figure 1A
zz <- levels(pdDeng$time)
pdDeng$bin <- ifelse(pdDeng$time %in% zz[1:2], "Bin A",
ifelse(pdDeng$time %in% zz[3:4], "Bin B",
ifelse(pdDeng$time %in% zz[5:7], "Bin C", "Bin D")))
dat <- sweep(eDeng[, pdDeng$bin == "Bin A"], 1, rowMeans(eDeng[, pdDeng$bin == "Bin A"]), FUN = "-")
s <- svd(dat)
pcVar <- c(s$d^2 / sum(s$d^2))[1:3]
scores <- data.frame(pdDeng[pdDeng$bin == "Bin A", ], s$v[, 1:3])
PC1.Deng.A <- ggplot(scores, aes(x = PDG, y = X1)) +
geom_point(size = 3, color = "blue") +
ylab(paste0("Principal Component 1 (", round(pcVar[1],2)*100, "%)")) +
xlab("Proportion of detected genes") +
labs(title = paste0("Deng et al., 2014 (Group A) (corr: ",
round(cor(scores$X1, scores$PDG), 2), ")")) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20))
dat <- sweep(eDeng[, pdDeng$bin %in% c("Bin B")], 1,
rowMeans(eDeng[, pdDeng$bin == c("Bin B")]), FUN = "-")
s <- svd(dat)
pcVar <- c(s$d^2 / sum(s$d^2))[1:3]
scores <- data.frame(pdDeng[pdDeng$bin %in% c("Bin B"), ], s$v[, 1:3])
PC1.Deng.B <- ggplot(scores, aes(x = PDG, y = X1)) +
geom_point(size = 3, color = "blue") +
ylab(paste0("Principal Component 1 (", round(pcVar[1],2)*100, "%)")) +
xlab("Proportion of detected genes") +
labs(title = paste0("Deng et al., 2014 (Group B) (corr: ",
round(cor(scores$X1, scores$PDG), 2), ")")) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20))
p1 <- plot_grid(PC1.Deng.A, PC1.Deng.B, labels = LETTERS[1:2],
label_size = 20, ncol = 1)
p1
# pdf(paste0(working_path, "scBatchSuppFig-1.pdf"), width = 10, height = 15)
# print(p1)
# dev.off()
colTum <- brewer.pal(8, "Dark2")
colRun <- brewer.pal(9, "Set1")[c(1:5, 8, 9)]
scores <- data.frame(pdTreutlein, "Fraction" = pdTreutlein$PDG,
"Batch" = paste(pdTreutlein$runID, pdTreutlein$fcLane, sep = "_"),
s$v[, 1:5])
levels(scores$Batch) <- c("Batch 4", "Batch 5", "Batch 6",
"Batch 1", "Batch 2", "Batch 7", "Batch 3")
scores$Batch <- factor(scores$Batch, levels = levels(scores$Batch)[c(4,5,7,1:3,6)])
scores = scores %>% filter(day %in% "ED18.5")
p.box.batch <- ggplot(scores, aes(x = Batch, y = Fraction, fill = Batch)) +
geom_boxplot() + xlab(" ") + ylab(" ") +
labs(title = "Treutlein et al. (2014) - Cells from the same \nbiological group (ED18.5) processed in three batches") +
ylab("Proportion of detected genes") + # ylim(0.05, 0.25) +
scale_fill_manual(values = colRun) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15))
p.box.batch
# pdf(paste0(working_path, "scBatchSuppFig-3.pdf"), width = 12, height = 10)
# print(p.box.batch)
# dev.off()
gg_color_hue <- function(n) {
hues = seq(15, 375, length=n+1)
hcl(h=hues, l=65, c=100)[1:n]
}
zz <- levels(pdDeng$time)
pdDeng$bin <- ifelse(pdDeng$time %in% zz[1:2], "Bin A",
ifelse(pdDeng$time %in% zz[3:4], "Bin B",
ifelse(pdDeng$time %in% zz[5:7], "Bin C", "Bin D")))
scores <- data.frame(pdDeng, s$v[, 1:3])
box.A.left <- scores %>% filter(bin == "Bin A") %>% ggplot(aes(x = time, y = PDG, fill = runID)) +
geom_boxplot() + xlab(" ") + ylab(" ") +
labs(title = "Deng et al. (2014) - Group A") +
ylab("Proportion of detected genes") +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15)) +
geom_vline(xintercept=c(1.5))
box.A.right <- scores %>% filter(bin == "Bin A") %>%
ggplot(aes(x = runID, y = PDG, fill = time)) +
geom_boxplot() + xlab(" ") + ylab(" ") +
labs(title = "Deng et al. (2014) - Group A") +
ylab("Proportion of detected genes") +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15)) +
geom_vline(xintercept=c(1.5))
sub1 <- scores %>% filter(bin == "Bin B")
sub <- scores %>% filter(bin == "Bin B", time == "Late 2-cell", runID != "Run0084")
cols = gg_color_hue(4)
box.B.left <- ggplot(sub1, aes(x = time, y = PDG)) +
geom_boxplot(aes(fill = runID)) + xlab(" ") + ylab(" ") +
labs(title = "Deng et al. (2014) - Group B") +
ylab("Proportion of detected genes") +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15)) +
geom_vline(xintercept=c(1.5)) +
geom_point(aes(x = 1.75, y = sub[2,]$PDG), color = cols[2], size = 5) +
geom_point(aes(x = 2.25, y = sub[1,]$PDG), color = cols[4], size = 5)
sub1 <- scores %>% filter(bin == "Bin B")
sub <- scores %>% filter(bin == "Bin B", time == "Late 2-cell", runID %in% c("Run0083", "Run0085"))
cols = gg_color_hue(2)
box.B.right <- ggplot(sub1, aes(x = runID, y = PDG)) +
geom_boxplot(aes(fill = time)) + xlab(" ") + ylab(" ") +
labs(title = "Deng et al. (2014) - Group B") +
ylab("Proportion of detected genes") +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15)) +
geom_vline(xintercept=c(1.5, 2.5, 3.5)) +
geom_point(aes(x = 2.20, y = sub[2,]$PDG), color = cols[2], size = 5) +
geom_point(aes(x = 4, y = sub[1,]$PDG), color = cols[2], size = 5)
box.C.left <- scores %>% filter(bin == "Bin C") %>%
ggplot(aes(x = time, y = PDG, fill = runID)) +
geom_boxplot() + xlab(" ") + ylab(" ") +
labs(title = "Deng et al. (2014) - Group C") +
ylab("Proportion of detected genes") +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15)) + ylim(0.30, 0.45) +
geom_vline(xintercept=c(1.5, 2.5))
box.C.right <- scores %>% filter(bin == "Bin C") %>%
ggplot(aes(x = runID, y = PDG, fill = time)) +
geom_boxplot() + xlab(" ") + ylab(" ") +
labs(title = "Deng et al. (2014) - Group C") +
ylab("Proportion of detected genes") +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15)) + ylim(0.30, 0.45) +
geom_vline(xintercept=c(1.5, 2.5, 3.5, 4.5, 5.5, 6.5))
box.D.left <- scores %>% filter(bin == "Bin D") %>%
ggplot(aes(x = time, y = PDG, fill = runID)) +
geom_boxplot() + xlab(" ") + ylab(" ") +
labs(title = "Deng et al. (2014) - Group D") +
ylab("Proportion of detected genes") +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15)) + ylim(0.25, 0.45) +
geom_vline(xintercept=c(1.5, 2.5))
box.D.right <- scores %>% filter(bin == "Bin D") %>%
ggplot(aes(x = runID, y = PDG, fill = time)) +
geom_boxplot() + xlab(" ") + ylab(" ") +
labs(title = "Deng et al. (2014) - Group D") +
ylab("Proportion of detected genes") +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
axis.text.x = element_text(size = 15),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15)) + ylim(0.25, 0.45) +
geom_vline(xintercept=c(1.5, 2.5, 3.5))
p1 <- plot_grid(box.A.left, box.A.right,
box.B.left, box.B.right,
box.C.left, box.C.right,
box.D.left, box.D.right, ncol = 2,
labels =LETTERS[1:8], label_size = 20)
p1
# pdf(paste0(working_path, "scBatchSuppFig-4.pdf"), width = 18, height = 22)
# print(p1)
# dev.off()
colTum <- brewer.pal(8, "Dark2")
colRun <- brewer.pal(9, "Set1")
scores <- data.frame(pdTrapnell, "Fraction" = pdTrapnell$PDG,
"Batch" = paste(pdTrapnell$runID, pdTrapnell$fcLane, sep = "_"))
levels(scores$Batch) <- c("Batch 2 (24h)", "Batch 1 (0h)",
"Batch 3 (48h)", "Batch 4 (72h)")
scores$Batch <- factor(scores$Batch, levels = levels(scores$Batch)[c(2,1,3,4)])
scores$hour <- factor(scores$hour, levels = levels(scores$hour)[c(2,3,4,1)])
levels(scores$hour) <- (c("0h", "24h", "48h", "72h"))
p.box.Trapnell <- ggplot(scores, aes(x = hour, y = Fraction, fill = Batch)) +
geom_boxplot() + xlab(" ") + ylab(" ") +
labs(title = "Trapnell et al. (2014) - Biological groups\n completely confounded with processed batches of cells") +
ylab("Proportion of detected genes") +
scale_fill_manual(values = colRun) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15))
scores <- data.frame(pdShalek, "Fraction" = pdShalek$PDG,
"Batch" = paste(pdShalek$runID, pdShalek$fcLane, sep = "_"))
levels(scores$Batch) <- c("Batch 1 (1h)", "Batch 2 (2h)",
"Batch 3 (4h)", "Batch 4 (6h)")
p.box.Shalek <- ggplot(scores, aes(x = time, y = Fraction, fill = Batch)) +
geom_boxplot() + xlab(" ") + ylab(" ") +
labs(title = "Shalek et al. (2014) - Biological groups\n completely confounded with processed batches of cells") +
ylab("Proportion of detected genes") +
scale_fill_manual(values = colRun) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20),
legend.text = element_text(size = 15),
legend.title = element_text(size = 15))
p1 <- plot_grid(p.box.Trapnell, p.box.Shalek,
ncol = 1, labels = LETTERS[1:2], label_size = 20)
p1
# pdf(paste0(working_path, "scBatchSuppFig-5.pdf"), width = 10, height = 15)
# print(p1)
# dev.off()
##### Figure 6A: Patel et al. (2014)
library(scRNASeqHumanPatelGlioblastoma)
data("glioHumanCount")
eset <- assay(glioHumanCount)
pd <- as.data.frame(colData(glioHumanCount))
pd$tumorName <- as.factor(as.character(pd$tumorName))
pd$RPM <- colSums(eset)/1e6
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
subIDs <- (pd$sampleType == "SC" & pd$includeSample == TRUE)
pdPatelCounts <- pd[subIDs,]
esetPatel <- eset[, subIDs]
# raw
lcountsPatel <- log2(esetPatel + 1)
# Figure 6B: Trapnell et al., 2014
library(scRNASeqHumanTrapnellMyoblast)
data("myoblastHumanCount")
eset <- assay(myoblastHumanCount)
pd <- as.data.frame(colData(myoblastHumanCount))
# # remove control cells, debris cells, and wells with more than 1 cell
pd <- pd[pd$characteristics_ch1.3 == "control well: FALSE" &
pd$characteristics_ch1.2 == "debris: FALSE" &
pd$characteristics_ch1.4 == "cells in well: 1", ]
eset <- eset[, match(rownames(pd), colnames(eset))]
# calculate PDG
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
pd$RPM <- colSums(eset) / 1e6
esetSubTrapnell <- eset
pdTrapnellCounts <- pd
# raw
lcountsTrapnell <- log2(esetSubTrapnell + 1)
##### Figure 6C: Shalek et al., 2014
library(scRNASeqMouseShalekDendritic)
data("dendriticMouseCount")
eset <- assay(dendriticMouseCount)
pd <- as.data.frame(colData(dendriticMouseCount))
eset <- eset[, pd$rep == FALSE & pd$onChip == FALSE ]
pd <- pd[pd$rep == FALSE & pd$onChip == FALSE, ]
eset <- eset[, match( colnames(eShalek), rownames(pd))]
pd <- pd[match( colnames(eShalek), rownames(pd)), ]
pd$RPM <- colSums(eset)/1e6
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
esetSubShalek <- eset
pdShalekCounts <- pd
# raw
lcountsShalek <- log2(esetSubShalek + 1)
# Figure 6D: Treutlein et al., 2014
library(scRNASeqMouseTreutleinLineage)
data("lungMouseCount")
eset <- assay(lungMouseCount)
pd <- as.data.frame(colData(lungMouseCount))
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
pd$RPM <- colSums(eset) / 1e6
levels(pd$characteristics_ch1.2) <- list("ED14.5" = "age: Embryonic day 14.5",
"ED16.5" = "age: Embryonic day 16.5",
"ED18.5" ="age: Embryonic day 18.5", "PND107" = "age: post natal day 107")
pd$day <- pd$characteristics_ch1.2
pd$sampleType <- factor(ifelse(grepl("bulk", pd$description), "bulk",
ifelse(grepl("no cell control", pd$description),
"nocell", "SC")), levels=c("SC", "bulk", "nocell"))
eset <- eset[, pd$sampleType == "SC"]
pd <- pd[pd$sampleType == "SC", ]
esetSubTreutlein <- eset
pdTreutleinCounts <- pd
# raw
lcountsTreutlein <- log2(esetSubTreutlein + 1)
# Figure 6E: Deng et al., 2014
library(scRNASeqMouseDengMonoAllelic)
data("embryoMouseCount")
eset <- assay(embryoMouseCount)
pd <- as.data.frame(colData(embryoMouseCount))
# remove liver and fibroblast cells
keepCells <- !grepl("Liver", pd$source_name_ch1) &
!grepl("fibroblast", pd$source_name_ch1) &
!grepl("strain: C57BL/6J", pd$characteristics_ch1)
eset <- eset[, keepCells]
pd <- pd[keepCells, ]
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
pd$RPM <- colSums(eset) / 1e6
esetSubDeng <- eset
pdDengCounts <- pd
# raw
lcountsDeng <- log2(esetSubDeng + 1)
x <- data.frame("Proportion" = c(colMeans(esetPatel>2), colMeans(esetSubTrapnell>2),
colMeans(esetSubShalek>2), colMeans(esetSubTreutlein>2),
colMeans(esetSubDeng>2)),
"Median" = c(apply(esetPatel,2,function(x) median(x[x>2])),
apply(esetSubTrapnell,2,function(x) median(x[x>2])),
apply(esetSubShalek,2,function(x) median(x[x>2])),
apply(esetSubTreutlein,2,function(x) median(x[x>2])),
apply(esetSubDeng,2,function(x) median(x[x>2]))),
"Study" = c(rep("Patel", ncol(esetPatel)),
rep("Trapnell", ncol(esetSubTrapnell)),
rep("Shalek", ncol(esetSubShalek)),
rep("Treutlein", ncol(esetSubTreutlein)),
rep("Deng", ncol(esetSubDeng))))
x$Study <- factor(x$Study, levels = c("Patel", "Trapnell", "Shalek",
"Treutlein", "Deng"))
p1 <- ggplot(x, aes(x = Proportion, y = Median)) + geom_point(size = 2) +
facet_wrap(~Study, ncol = 2, scales= "free") +
geom_vline(aes(xintercept = 0.05), colour = "red") +
xlab("Proportion of detected genes") +
ylab("Median expression") +
theme(plot.title = element_text(size = 15),
axis.text = element_text(size = 15),
axis.title = element_text(size = 15))
p1
# pdf(paste0(working_path, "scBatchSuppFig-6.pdf"), width = 12, height = 15)
# print(p1)
# dev.off()
##### Figure 7A: Patel et al. (2014)
library(scRNASeqHumanPatelGlioblastoma)
data("glioHumanCount")
eset <- assay(glioHumanCount)
pd <- as.data.frame(colData(glioHumanCount))
pd$tumorName <- as.factor(as.character(pd$tumorName))
pd$RPM <- colSums(eset)/1e6
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
subIDs <- (pd$sampleType == "SC" & pd$includeSample == TRUE)
pdSub <- pd[subIDs,]
esetSub <- eset[, subIDs]
esetSub <- esetSub[,order(pdSub$PDG)]
pdSub <- pdSub[order(pdSub$PDG), ]
# raw
lcounts <- log2(esetSub + 1)
# adjusting for library size
tmp = pdSub$RPM
rpm <- sweep(esetSub, 2, tmp, FUN = "/")
lrpm <- log2(rpm + 1)
pdSub$keepme2 <- pdSub$PDG > 0.05
datbig <- pdSub %>% filter(keepme2 == TRUE)
datsmall <- pdSub %>% filter(keepme2 == FALSE)
# detection rated adjusted
tmp = pdSub$RPM * (median(pdSub$PDG) / (pdSub$PDG))
rpm.corr <- sweep(esetSub, 2, tmp, FUN = "/")
lrpmcorrPatel <- log2(rpm.corr + 1)
out.raw = apply(lrpm, 2, function(x) median(x[x>=1]))
out.corr = apply(lrpmcorrPatel, 2, function(x) median(x[x>=1]))
dat = data.frame("PDG" = pdSub$PDG, "CPM" = out.raw, "CPM.corr" = out.corr)
dat1 = data.frame("PDG" = rep(pdSub$PDG, 2), "keepme2" = rep(pdSub$keepme2, 2), melt(dat[,-1]))
colnames(dat1)[3] <- "Normalization"
levels(dat1$Normalization) <- c("Counts per million", "Detection rate adjusted CPM")
dataCPM = dat1 %>% filter(Normalization == "Counts per million")
databig = dat1 %>% filter(Normalization == "Detection rate adjusted CPM", keepme2 == TRUE)
datasmall = dat1 %>% filter(Normalization == "Detection rate adjusted CPM", keepme2 == FALSE)
medExprs.Fig7A <- ggplot(dat1, aes(x = PDG, y = value, colour = Normalization)) +
geom_point(size = 2) +
geom_point(data = datasmall, aes(x = PDG, y = value), color = "gray") +
stat_smooth(data = dataCPM, se = FALSE, degree = 1, span = 1, size = 2, family="symmetric") +
stat_smooth(data = databig, se = FALSE, degree = 1, span = 1, size = 2, family="symmetric") +
xlab("Proportion of detected genes") +
ylab("Median expression (log2)") +
labs(title = "Patel et al., 2014") +
theme(legend.position=c(0.5, 0.8),
plot.title = element_text(size =20),
legend.text = element_text(size = 20),
legend.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size =20),
legend.position = "none")
### Supp Fig 8 Patel (row 1)
dat <- tidyQuants(lcounts, PDGval = pdSub$PDG, qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
p.Patel.lcounts <- ggplot(dat, aes(x = PDG, y = value, colour = Quantile)) +
stat_smooth(se = FALSE, span = 0.5, degree = 1, size = 2) + geom_point(size = 2) +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Patel et al., 2014 (raw counts)")
dat <- tidyQuants(lrpm, PDGval = pdSub$PDG, qRange = c(0.01, 0.25, 0.5, 0.75, 0.99)) #
p.Patel.lrpm <- ggplot(dat, aes(x = PDG, y = value, colour = Quantile)) +
stat_smooth(se = FALSE, span = 0.5, degree = 1, size = 2) + geom_point(size = 2) +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Patel et al., 2014 (CPM)")
datkeep <- tidyQuants(lrpmcorrPatel[, pdSub$keepme2], PDGval = pdSub$PDG[pdSub$keepme2],
qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
p.Patel.lrpm.corr <- ggplot(data = datkeep, aes(x = PDG, y = value)) +
geom_point(data = datkeep, aes(colour = Quantile), size = 2) +
# geom_point(data = datnokeep, aes(x = PDG, y = value), color = "gray") +
stat_smooth(method = "loess", data = datkeep, aes(color = Quantile), se = FALSE, span = 0.5,
degree = 1, size = 2, family = "symmetric") +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Patel et al., 2014 (Detection rate adjusted CPM)")
# Figure 7B: Trapnell et al., 2014
library(scRNASeqHumanTrapnellMyoblast)
data("myoblastHumanCount")
eset <- assay(myoblastHumanCount)
pd <- as.data.frame(colData(myoblastHumanCount))
# # remove control cells, debris cells, and wells with more than 1 cell
pd <- pd[pd$characteristics_ch1.3 == "control well: FALSE" &
pd$characteristics_ch1.2 == "debris: FALSE" &
pd$characteristics_ch1.4 == "cells in well: 1", ]
eset <- eset[, match(rownames(pd), colnames(eset))]
# calculate PDG
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
pd$RPM <- colSums(eset) / 1e6
esetSub <- eset[,order(pd$PDG)]
pdSub <- pd[order(pd$PDG), ]
# raw
lcounts <- log2(esetSub + 1)
# adjusting for library size
tmp = pdSub$RPM
rpm <- sweep(esetSub, 2, tmp, FUN = "/")
lrpm <- log2(rpm + 1)
pdSub$keepme2 <- pdSub$PDG > 0.05
datbig <- pdSub %>% filter(keepme2 == TRUE)
datsmall <- pdSub %>% filter(keepme2 == FALSE)
# detection rated adjusted
tmp = pdSub$RPM * (median(pdSub$PDG) / (pdSub$PDG))
rpm.corr <- sweep(esetSub, 2, tmp, FUN = "/")
lrpmcorrTrapnell <- log2(rpm.corr + 1)
out.raw = apply(lrpm, 2, function(x) median(x[x>=1]))
out.corr = apply(lrpmcorrTrapnell, 2, function(x) median(x[x>=1]))
dat = data.frame("PDG" = pdSub$PDG, "CPM" = out.raw, "CPM.corr" = out.corr)
dat1 = data.frame("PDG" = rep(pdSub$PDG, 2), "keepme2" = rep(pdSub$keepme2, 2), melt(dat[,-1]))
colnames(dat1)[3] <- "Normalization"
levels(dat1$Normalization) <- c("Counts per million", "Detection rate adjusted CPM")
dataCPM = dat1 %>% filter(Normalization == "Counts per million")
databig = dat1 %>% filter(Normalization == "Detection rate adjusted CPM", keepme2 == TRUE)
datasmall = dat1 %>% filter(Normalization == "Detection rate adjusted CPM", keepme2 == FALSE)
medExprs.Fig7B <- ggplot(data = dat1, aes(x = PDG, y = value, colour = Normalization)) +
geom_point(size = 2) +
geom_point(data = datasmall, aes(x = PDG, y = value), color = "gray") +
stat_smooth(data = dataCPM, se = FALSE, degree = 1, span = 1, size = 2) +
stat_smooth(data = databig, se = FALSE, degree = 1, span = 1, size = 2) +
xlab("Proportion of detected genes") +
ylab("Median expression (log2)") +
labs(title = "Trapnell et al., 2014") +
theme(legend.position=c(0.5, 0.8),
plot.title = element_text(size =20),
legend.text = element_text(size = 20),
legend.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size =20),
legend.position = "none")
### Supp Fig 8 Trapnell (row 2)
dat <- tidyQuants(lcounts, PDGval = pdSub$PDG, qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
p.Trapnell.lcounts <- ggplot(dat, aes(x = PDG, y = value, colour = Quantile)) +
stat_smooth(se = FALSE, span = 1, degree = 1, size = 2) + geom_point(size = 2) +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Trapnell et al., 2014 (raw counts)")
dat <- tidyQuants(lrpm, PDGval = pdSub$PDG, qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
p.Trapnell.lrpm <- ggplot(dat, aes(x = PDG, y = value, colour = Quantile)) +
stat_smooth(se = FALSE, span = 1 , degree = 1, size = 2) + geom_point(size = 2) +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Trapnell et al., 2014 (CPM)")
datkeep <- tidyQuants(lrpmcorrTrapnell[, pdSub$keepme2], PDGval = pdSub$PDG[pdSub$keepme2],
qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
datnokeep <- tidyQuants(lrpmcorrTrapnell[, !pdSub$keepme2], PDGval = pdSub$PDG[!pdSub$keepme2],
qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
p.Trapnell.lrpm.corr <- ggplot(data = datkeep, aes(x = PDG, y = value)) +
geom_point(data = datkeep, aes(colour = Quantile), size = 2) +
geom_point(data = datnokeep, aes(x = PDG, y = value), color = "gray") +
stat_smooth(method = "loess", data = datkeep, aes(color = Quantile), se = FALSE, span = 1,
degree = 1, size = 2) +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Trapnell et al., 2014 (Detection rate adjusted CPM)")
# Figure 7C: Treutlein et al., 2014
library(scRNASeqMouseTreutleinLineage)
data("lungMouseCount")
eset <- assay(lungMouseCount)
pd <- as.data.frame(colData(lungMouseCount))
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
pd$RPM <- colSums(eset) / 1e6
levels(pd$characteristics_ch1.2) <- list("ED14.5" = "age: Embryonic day 14.5",
"ED16.5" = "age: Embryonic day 16.5",
"ED18.5" ="age: Embryonic day 18.5", "PND107" = "age: post natal day 107")
pd$day <- pd$characteristics_ch1.2
pd$sampleType <- factor(ifelse(grepl("bulk", pd$description), "bulk",
ifelse(grepl("no cell control", pd$description),
"nocell", "SC")), levels=c("SC", "bulk", "nocell"))
eset <- eset[, pd$sampleType == "SC"]
pd <- pd[pd$sampleType == "SC", ]
esetSub <- eset[,order(pd$PDG)]
pdSub <- pd[order(pd$PDG), ]
# raw
lcounts <- log2(esetSub + 1)
# adjusting for library size
tmp = pdSub$RPM
rpm <- sweep(esetSub, 2, tmp, FUN = "/")
lrpm <- log2(rpm + 1)
pdSub$keepme2 <- pdSub$PDG > 0.05
datbig <- pdSub %>% filter(keepme2 == TRUE)
datsmall <- pdSub %>% filter(keepme2 == FALSE)
# detection rated adjusted
tmp = pdSub$RPM * (median(pdSub$PDG) / (pdSub$PDG))# + (1-max(pdSub$PDG))))
rpm.corr <- sweep(esetSub, 2, tmp, FUN = "/")
lrpmcorrTreutlein <- log2(rpm.corr + 1)
out.raw = apply(lrpm, 2, function(x) median(x[x>=1]))
out.corr = apply(lrpmcorrTreutlein, 2, function(x) median(x[x>=1]))
dat = data.frame("PDG" = pdSub$PDG, "CPM" = out.raw, "CPM.corr" = out.corr)
dat1 = data.frame("PDG" = rep(pdSub$PDG, 2), "keepme2" = rep(pdSub$keepme2, 2), melt(dat[,-1]))
colnames(dat1)[3] <- "Normalization"
levels(dat1$Normalization) <- c("Counts per million", "Detection rate adjusted CPM")
dataCPM = dat1 %>% filter(Normalization == "Counts per million")
databig = dat1 %>% filter(Normalization == "Detection rate adjusted CPM", keepme2 == TRUE)
datasmall = dat1 %>% filter(Normalization == "Detection rate adjusted CPM", keepme2 == FALSE)
medExprs.Fig7C <- ggplot(data = dat1, aes(x = PDG, y = value, colour = Normalization)) +
geom_point(size = 2) +
geom_point(data = datasmall, aes(x = PDG, y = value), color = "gray") +
stat_smooth(data = dataCPM, se = FALSE, degree = 1, span = 1, size = 2) +
stat_smooth(data = databig, se = FALSE, degree = 1, span = 1, size = 2) +
xlab("Proportion of detected genes") +
ylab("Median expression (log2)") +
labs(title = "Treutlien et al., 2014") +
theme(legend.position=c(0.5, 0.8),
plot.title = element_text(size =20),
legend.text = element_text(size = 20),
legend.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size =20),
legend.position = "none")
### Supp Fig 8 Treutlein (row 3)
dat <- tidyQuants(lcounts, PDGval = pdSub$PDG, qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
p.Treutlien.lcounts <- ggplot(dat, aes(x = PDG, y = value, colour = Quantile)) +
stat_smooth(se = FALSE, span = 1, degree = 1, size = 2) + geom_point(size = 2) +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Treutlien et al., 2014 (raw counts)")
dat <- tidyQuants(lrpm, PDGval = pdSub$PDG, qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
p.Treutlien.lrpm <- ggplot(dat, aes(x = PDG, y = value, colour = Quantile)) +
stat_smooth(se = FALSE, span = 1, degree = 1, size = 2) + geom_point(size = 2) +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Treutlien et al., 2014 (CPM)")
datkeep <- tidyQuants(lrpmcorrTreutlein[, pdSub$keepme2], PDGval = pdSub$PDG[pdSub$keepme2],
qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
datnokeep <- tidyQuants(lrpmcorrTreutlein[, !pdSub$keepme2], PDGval = pdSub$PDG[!pdSub$keepme2],
qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
p.Treutlien.lrpm.corr <- ggplot(data = datkeep, aes(x = PDG, y = value)) +
geom_point(data = datkeep, aes(colour = Quantile), size = 2) +
geom_point(data = datnokeep, aes(x = PDG, y = value), color = "gray") +
stat_smooth(method = "loess", data = datkeep, aes(color = Quantile), se = FALSE, span = 1,
degree = 1, size = 2) +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Treutlien et al., 2014 (Detection rate adjusted CPM)")
# Figure 7D: Deng et al., 2014
library(scRNASeqMouseDengMonoAllelic)
data("embryoMouseCount")
eset <- assay(embryoMouseCount)
pd <- as.data.frame(colData(embryoMouseCount))
# remove liver and fibroblast cells
keepCells <- !grepl("Liver", pd$source_name_ch1) &
!grepl("fibroblast", pd$source_name_ch1) &
!grepl("strain: C57BL/6J", pd$characteristics_ch1)
eset <- eset[, keepCells]
pd <- pd[keepCells, ]
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
pd$RPM <- colSums(eset) / 1e6
esetSub <- eset[,order(pd$PDG)]
pdSub <- pd[order(pd$PDG), ]
# raw
lcounts <- log2(esetSub + 1)
# adjusting for library size
tmp = pdSub$RPM
rpm <- sweep(esetSub, 2, tmp, FUN = "/")
lrpm <- log2(rpm + 1)
pdSub$keepme2 <- pdSub$PDG > 0.05
datbig <- pdSub %>% filter(keepme2 == TRUE)
datsmall <- pdSub %>% filter(keepme2 == FALSE)
# detection rated adjusted
tmp = pdSub$RPM * (median(pdSub$PDG) / (pdSub$PDG))
rpm.corr <- sweep(esetSub, 2, tmp, FUN = "/")
lrpmcorrDeng <- log2(rpm.corr + 1)
out.raw = apply(lrpm, 2, function(x) median(x[x>=1]))
out.corr = apply(lrpmcorrDeng, 2, function(x) median(x[x>=1]))
dat = data.frame("PDG" = pdSub$PDG, "CPM" = out.raw, "CPM.corr" = out.corr)
dat1 = data.frame("PDG" = rep(pdSub$PDG, 2), "keepme2" = rep(pdSub$keepme2, 2), melt(dat[,-1]))
colnames(dat1)[3] <- "Normalization"
levels(dat1$Normalization) <- c("Counts per million", "Detection rate adjusted CPM")
dataCPM = dat1 %>% filter(Normalization == "Counts per million")
databig = dat1 %>% filter(Normalization == "Detection rate adjusted CPM", keepme2 == TRUE)
datasmall = dat1 %>% filter(Normalization == "Detection rate adjusted CPM", keepme2 == FALSE)
medExprs.Fig7E <- ggplot(data = dat1, aes(x = PDG, y = value, colour = Normalization)) +
geom_point(size = 2) +
geom_point(data = datasmall, aes(x = PDG, y = value), color = "gray") +
stat_smooth(data = dataCPM, se = FALSE, degree = 1, span = 1, size = 2) +
stat_smooth(data = databig, se = FALSE, degree = 1, span = 1, size = 2) +
xlab("Proportion of detected genes") +
ylab("Median expression (log2)") +
labs(title = "Deng et al., 2014") +
theme(legend.position=c(0.5, 0.2),
plot.title = element_text(size =20),
legend.text = element_text(size = 20),
legend.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size =20),
legend.position = "none")
### Supp Fig 8 Deng (row 4)
dat <- tidyQuants(lcounts, PDGval = pdSub$PDG, qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
p.Deng.lcounts <- ggplot(dat, aes(x = PDG, y = value, colour = Quantile)) +
stat_smooth(se = FALSE, span = 1, degree = 1, size = 2) + geom_point(size = 2) +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Deng et al., 2014 (raw counts)")
dat <- tidyQuants(lrpm, PDGval = pdSub$PDG, qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
p.Deng.lrpm <- ggplot(dat, aes(x = PDG, y = value, colour = Quantile)) +
stat_smooth(se = FALSE, span = 1, degree = 1, size = 2) + geom_point(size = 2) +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Deng et al., 2014 (CPM)")
datkeep <- tidyQuants(lrpmcorrDeng[, pdSub$keepme2], PDGval = pdSub$PDG[pdSub$keepme2],
qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
p.Deng.lrpm.corr <- ggplot(data = datkeep, aes(x = PDG, y = value)) +
geom_point(data = datkeep, aes(colour = Quantile), size = 2) +
# geom_point(data = datnokeep, aes(x = PDG, y = value), color = "gray") +
stat_smooth(method = "loess", data = datkeep, aes(color = Quantile), se = FALSE, span = 1,
degree = 1, size = 2) +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Deng et al., 2014 (Detection rate adjusted CPM)")
##### Figure 4E: Shalek et al., 2014
library(scRNASeqMouseShalekDendritic)
data("dendriticMouseCount")
eset <- assay(dendriticMouseCount)
pd <- as.data.frame(colData(dendriticMouseCount))
eset <- eset[, pd$rep == FALSE & pd$onChip == FALSE ]
pd <- pd[pd$rep == FALSE & pd$onChip == FALSE, ]
eset <- eset[, match( colnames(eShalek), rownames(pd) )]
pd <- pd[match( colnames(eShalek), rownames(pd) ), ]
pd$RPM <- colSums(eset)/1e6
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
esetSub <- eset[,order(pd$PDG)]
pdSub <- pd[order(pd$PDG), ]
# raw
lcounts <- log2(esetSub + 1)
# adjusting for library size
tmp = pdSub$RPM
rpm <- sweep(esetSub, 2, tmp, FUN = "/")
lrpm <- log2(rpm + 1)
pdSub$keepme2 <- pdSub$PDG > 0.05
datbig <- pdSub %>% filter(keepme2 == TRUE)
datsmall <- pdSub %>% filter(keepme2 == FALSE)
# detection rated adjusted
tmp = pdSub$RPM * (median(pdSub$PDG) / (pdSub$PDG))# + (1-max(pdSub$PDG))))
rpm.corr <- sweep(esetSub, 2, tmp, FUN = "/")
lrpmcorrShalek <- log2(rpm.corr + 1)
out.raw = apply(lrpm, 2, function(x) median(x[x>=1]))
out.corr = apply(lrpmcorrShalek, 2, function(x) median(x[x>=1]))
dat = data.frame("PDG" = pdSub$PDG, "CPM" = out.raw, "CPM.corr" = out.corr)
dat1 = data.frame("PDG" = rep(pdSub$PDG, 2), "keepme2" = rep(pdSub$keepme2, 2), melt(dat[,-1]))
colnames(dat1)[3] <- "Normalization"
levels(dat1$Normalization) <- c("Counts per million", "Detection rate adjusted CPM")
dataCPM = dat1 %>% filter(Normalization == "Counts per million")
databig = dat1 %>% filter(Normalization == "Detection rate adjusted CPM", keepme2 == TRUE)
datasmall = dat1 %>% filter(Normalization == "Detection rate adjusted CPM", keepme2 == FALSE)
medExprs.Fig7D <- ggplot(data = dat1, aes(x = PDG, y = value, colour = Normalization)) +
geom_point(size = 2) +
geom_point(data = datasmall, aes(x = PDG, y = value), color = "gray") +
stat_smooth(data = dataCPM, se = FALSE, degree = 1, span = 1, size = 2) +
stat_smooth(data = databig, se = FALSE, degree = 1, span = 1, size = 2) +
xlab("Proportion of detected genes") +
ylab("Median expression (log2)") +
labs(title = "Shalek et al., 2014") +
theme(legend.position=c(0.5, 0.8),
plot.title = element_text(size =20),
legend.text = element_text(size = 20),
legend.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size =20),
legend.position = "none")
### Supp Fig 8 Shalek (row 5)
dat <- tidyQuants(lcounts, PDGval = pdSub$PDG, qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
p.Shalek.lcounts <- ggplot(dat, aes(x = PDG, y = value, colour = Quantile)) +
stat_smooth(se = FALSE, span = 1, degree = 1, size = 2) + geom_point(size = 2) +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Shalek et al., 2014 (raw counts)")
dat <- tidyQuants(lrpm, PDGval = pdSub$PDG, qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
p.Shalek.lrpm <- ggplot(dat, aes(x = PDG, y = value, colour = Quantile)) +
stat_smooth(se = FALSE, span = 1, degree = 1, size = 2) + geom_point(size = 2) +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Shalek et al., 2014 (CPM)")
datkeep <- tidyQuants(lrpmcorrShalek[, pdSub$keepme2], PDGval = pdSub$PDG[pdSub$keepme2],
qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
datnokeep <- tidyQuants(lrpmcorrShalek[, !pdSub$keepme2], PDGval = pdSub$PDG[!pdSub$keepme2],
qRange = c(0.01, 0.25, 0.5, 0.75, 0.99))
p.Shalek.lrpm.corr <- ggplot(data = datkeep, aes(x = PDG, y = value)) +
geom_point(data = datkeep, aes(colour = Quantile), size = 2) +
geom_point(data = datnokeep, aes(x = PDG, y = value), color = "gray") +
stat_smooth(method = "loess", data = datkeep, aes(color = Quantile), se = FALSE, span = 1,
degree = 1, size = 2) +
xlab("Proportion of detected genes") + ylim(0, 16.1) +
ylab("Expression (log2)") +
labs(title = "Shalek et al., 2014 (Detection rate adjusted CPM)")
p1 <- plot_grid(medExprs.Fig7A, medExprs.Fig7B, medExprs.Fig7D,
medExprs.Fig7C, medExprs.Fig7E,
ncol =2 , labels = c("A", "B", "C", "D", "E"),
label_size = 25)
p1
# pdf(paste0(working_path, "scBatchSuppFig-7.pdf"), width = 12, height = 16)
# print(p1)
# dev.off()
p2 <- plot_grid(p.Patel.lcounts, p.Patel.lrpm, p.Patel.lrpm.corr,
p.Trapnell.lcounts, p.Trapnell.lrpm, p.Trapnell.lrpm.corr,
p.Shalek.lcounts, p.Shalek.lrpm, p.Shalek.lrpm.corr,
p.Treutlien.lcounts, p.Treutlien.lrpm, p.Treutlien.lrpm.corr,
p.Deng.lcounts, p.Deng.lrpm, p.Deng.lrpm.corr,
ncol = 3)
p2
# pdf(paste0(working_path, "scBatchSuppFig-8.pdf"), width = 25, height = 27)
# print(p1)
# dev.off()
##### Figure 9A Patel
library(scRNASeqHumanPatelGlioblastoma)
data("glioHumanCount")
eset <- assay(glioHumanCount)
pd <- as.data.frame(colData(glioHumanCount))
pd$tumorName <- as.factor(as.character(pd$tumorName))
pd$RPM <- colSums(eset)/1e6
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
subIDs <- (pd$sampleType == "SC" & pd$includeSample == TRUE)
pdPatelCounts <- pd[subIDs,]
esetSubPatel <- eset[, subIDs]
# raw
lcountsPatel <- log2(esetSubPatel + 1)
# adjusting for library size
tmp = pdPatelCounts$RPM
rpm <- sweep(esetSubPatel, 2, tmp, FUN = "/")
lrpmPatel <- log2(rpm + 1)
eSub <- lrpmPatel[, pdPatelCounts$PDG > 0.05]
pdSub <- pdPatelCounts[pdPatelCounts$PDG > 0.05, ]
keepIndex <- rowSums(eSub > 0) >= ncol(eSub); sum(keepIndex)
## [1] 56
dat <- sweep(eSub[keepIndex, ], 1, rowMeans(eSub[keepIndex, ]), FUN = "-")
s <- svd(dat)
scores <- data.frame(pdSub, s$v[, 1:5], "Batch" = factor(paste0(pdSub$runID, pdSub$fcLane)))
pc1Var <- c(s$d^2 / sum(s$d^2))[1]
PC1.Patel <- ggplot(scores, aes(x = PDG, y = X1)) +
geom_point(size = 2, color = "blue") +
ylab(paste0("Principal Component 1 (", round(pc1Var,2)*100, "%)")) +
xlab("Proportion of detected genes") +
labs(title = paste0("Patel et al., 2014 (corr: ",
round(cor(scores$X1, scores$PDG), 2), ")")) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20))
##### Figure 9B Trapnell
library(scRNASeqHumanTrapnellMyoblast)
data("myoblastHumanCount")
eset <- assay(myoblastHumanCount)
pd <- as.data.frame(colData(myoblastHumanCount))
# # remove control cells, debris cells, and wells with more than 1 cell
pd <- pd[pd$characteristics_ch1.3 == "control well: FALSE" &
pd$characteristics_ch1.2 == "debris: FALSE" &
pd$characteristics_ch1.4 == "cells in well: 1", ]
eset <- eset[, match(rownames(pd), colnames(eset))]
# calculate PDG
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
pd$RPM <- colSums(eset) / 1e6
esetSubTrapnell <- eset
pdTrapnellCounts <- pd
# adjusting for library size
tmp = pdTrapnellCounts$RPM
rpm <- sweep(esetSubTrapnell, 2, tmp, FUN = "/")
lrpmTrapnell <- log2(rpm + 1)
eSub <- lrpmTrapnell[, pdTrapnellCounts$PDG > 0.05]
pdSub <- pdTrapnellCounts[pdTrapnellCounts$PDG > 0.05, ]
keepIndex <- rowSums(eSub > 0) >= ncol(eSub); sum(keepIndex)
## [1] 369
dat <- sweep(eSub[keepIndex, ], 1, rowMeans(eSub[keepIndex, ]), FUN = "-")
s <- svd(dat)
scores <- data.frame(pdSub, s$v[, 1:5], "Batch" = factor(paste0(pdSub$runID, pdSub$fcLane)))
pc1Var <- c(s$d^2 / sum(s$d^2))[1]
PC1.Trapnell <- ggplot(scores, aes(x = PDG, y = X1)) +
geom_point(size = 2, color = "blue") +
ylab(paste0("Principal Component 1 (", round(pc1Var,2)*100, "%)")) +
xlab("Proportion of detected genes") +
labs(title = paste0("Trapnell et al., 2014 (corr: ",
round(cor(scores$X1, scores$PDG), 2), ")")) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20))
##### Figure 9C
library(scRNASeqMouseShalekDendritic)
data("dendriticMouseCount")
eset <- assay(dendriticMouseCount)
pd <- as.data.frame(colData(dendriticMouseCount))
eset <- eset[, pd$rep == FALSE & pd$onChip == FALSE ]
pd <- pd[pd$rep == FALSE & pd$onChip == FALSE, ]
eset <- eset[, match( colnames(eShalek), rownames(pd))]
pd <- pd[match( colnames(eShalek), rownames(pd)), ]
pd$RPM <- colSums(eset)/1e6
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
esetSubShalek <- eset
pdShalekCounts <- pd
# adjusting for library size
tmp = pdShalekCounts$RPM
rpm <- sweep(esetSubShalek, 2, tmp, FUN = "/")
lrpmShalek <- log2(rpm + 1)
eSub <- lrpmShalek[, pdShalekCounts$PDG > 0.05]
pdSub <- pdShalekCounts[pdShalekCounts$PDG > 0.05, ]
keepIndex <- rowSums(eSub > 0) >= ncol(eSub); sum(keepIndex)
## [1] 136
dat <- sweep(eSub[keepIndex, ], 1, rowMeans(eSub[keepIndex, ]), FUN = "-")
s <- svd(dat)
scores <- data.frame(pdSub, s$v[, 1:5], "Batch" = factor(paste0(pdSub$runID, pdSub$fcLane)))
pc1Var <- c(s$d^2 / sum(s$d^2))[1]
scores$X1 <- scores$X1*(-1)
PC1.Shalek <- ggplot(scores, aes(x = PDG, y = X1)) +
geom_point(size = 2, color = "blue") +
ylab(paste0("Principal Component 1 (", round(pc1Var,2)*100, "%)")) +
xlab("Proportion of detected genes") +
labs(title = paste0("Shalek et al., 2014 (corr: ",
round(cor(scores$X1, scores$PDG), 2), ")")) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20))
##### Figure 9D
library(scRNASeqMouseTreutleinLineage)
data("lungMouseCount")
eset <- assay(lungMouseCount)
pd <- as.data.frame(colData(lungMouseCount))
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
pd$RPM <- colSums(eset) / 1e6
levels(pd$characteristics_ch1.2) <- list("ED14.5" = "age: Embryonic day 14.5",
"ED16.5" = "age: Embryonic day 16.5",
"ED18.5" ="age: Embryonic day 18.5", "PND107" = "age: post natal day 107")
pd$day <- pd$characteristics_ch1.2
pd$sampleType <- factor(ifelse(grepl("bulk", pd$description), "bulk",
ifelse(grepl("no cell control", pd$description),
"nocell", "SC")), levels=c("SC", "bulk", "nocell"))
eset <- eset[, pd$sampleType == "SC"]
pd <- pd[pd$sampleType == "SC", ]
esetSubTreutlein <- eset
pdTreutleinCounts <- pd
# adjusting for library size
tmp = pdTreutleinCounts$RPM
rpm <- sweep(esetSubTreutlein, 2, tmp, FUN = "/")
lrpmTreutlein <- log2(rpm + 1)
eSub <- lrpmTreutlein[, pdTreutleinCounts$PDG > 0.05]
pdSub <- pdTreutleinCounts[pdTreutleinCounts$PDG > 0.05, ]
keepIndex <- rowSums(eSub > 0) >= ncol(eSub); sum(keepIndex)
## [1] 103
dat <- sweep(eSub[keepIndex, ], 1, rowMeans(eSub[keepIndex, ]), FUN = "-")
s <- svd(dat)
scores <- data.frame(pdSub, s$v[, 1:5])
pc1Var <- c(s$d^2 / sum(s$d^2))[1]
scores$X1 <- scores$X1*(-1)
PC1.Treutlein <- ggplot(scores, aes(x = PDG, y = X1)) +
geom_point(size = 2, color = "blue") +
ylab(paste0("Principal Component 1 (", round(pc1Var,2)*100, "%)")) +
xlab("Proportion of detected genes") +
labs(title = paste0("Treutlein et al., 2014 (corr: ",
round(cor(scores$X1, scores$PDG), 2), ")")) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20))
##### Figure 9 Deng group C and Deng group D
library(scRNASeqMouseDengMonoAllelic)
data("embryoMouseCount")
eset <- assay(embryoMouseCount)
pd <- as.data.frame(colData(embryoMouseCount))
# remove liver and fibroblast cells
keepCells <- !grepl("Liver", pd$source_name_ch1) &
!grepl("fibroblast", pd$source_name_ch1) &
!grepl("strain: C57BL/6J", pd$characteristics_ch1)
eset <- eset[, keepCells]
pd <- pd[keepCells, ]
pd$PDG <- 1 - (apply(eset, 2, function(x){ length(which(x == 0)) }) / nrow(eset))
pd$RPM <- colSums(eset) / 1e6
esetSubDeng <- eset
pdDengCounts <- pd
# adjusting for library size
tmp = pdDengCounts$RPM
rpm <- sweep(esetSubDeng, 2, tmp, FUN = "/")
lrpmDeng <- log2(rpm + 1)
pdDengCounts$time <- pdDeng$time
eSub <- eDeng[, pdDeng$PDG > 0.05]
pdSub <- pdDeng[pdDeng$PDG > 0.05, ]
keepIndex <- rowSums(eSub > 0) >= ncol(eSub); sum(keepIndex)
## [1] 404
dat <- sweep(eSub[keepIndex, ], 1, rowMeans(eSub[keepIndex, ]), FUN = "-")
s <- svd(dat)
pcVar <- c(s$d^2 / sum(s$d^2))[1:3]
scores <- data.frame(pdDeng[pdDeng$PDG > 0.05, ], s$v[, 1:3])
scores$X1 <- scores$X1 * (-1)
PC1.Deng <- ggplot(scores, aes(x = PDG, y = X1)) +
geom_point(size = 2, color = "blue") +
ylab(paste0("Principal Component 1 (", round(pc1Var,2)*100, "%)")) +
xlab("Proportion of detected genes") +
labs(title = paste0("Deng et al., 2014 (corr: ",
round(cor(scores$X1, scores$PDG), 2), ")")) +
theme(plot.title = element_text(size = 20),
axis.text = element_text(size = 20),
axis.title = element_text(size = 20))
pout <- plot_grid(PC1.Patel, PC1.Trapnell, PC1.Shalek,
PC1.Treutlein, PC1.Deng, ncol = 2,
labels = LETTERS[1:5],
label_size = 25)
pout
# pdf(paste0(working_path, "scBatchSuppFig-9.pdf"), width = 12, height = 16)
# print(pout)
# dev.off()
sessionInfo()
## R version 3.2.0 (2015-04-16)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.10.5 (Yosemite)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 parallel grid stats graphics grDevices datasets
## [8] utils methods base
##
## other attached packages:
## [1] scRNASeqHumanPatelGlioblastoma_0.1 scRNASeqMouseDengMonoAllelic_0.1
## [3] scRNASeqMouseTreutleinLineage_0.1 scRNASeqMouseShalekDendritic_0.1
## [5] scRNASeqHumanTrapnellMyoblast_0.1 dplyr_0.4.1
## [7] RColorBrewer_1.1-2 tidyr_0.2.0
## [9] reshape2_1.4.1 stringr_1.0.0
## [11] vcd_1.4-1 GenomicFeatures_1.21.13
## [13] AnnotationDbi_1.31.15 Biobase_2.29.1
## [15] GenomicRanges_1.21.15 GenomeInfoDb_1.5.7
## [17] IRanges_2.3.11 S4Vectors_0.7.3
## [19] BiocGenerics_0.15.2 cowplot_0.5.0
## [21] ggplot2_1.0.1 knitr_1.10.5
##
## loaded via a namespace (and not attached):
## [1] SummarizedExperiment_0.1.5 zoo_1.7-12
## [3] lattice_0.20-31 colorspace_1.2-6
## [5] htmltools_0.2.6 rtracklayer_1.29.7
## [7] yaml_2.1.13 XML_3.98-1.2
## [9] DBI_0.3.1 BiocParallel_1.3.23
## [11] lambda.r_1.1.7 plyr_1.8.3
## [13] zlibbioc_1.15.0 Biostrings_2.37.2
## [15] munsell_0.4.2 gtable_0.1.2
## [17] futile.logger_1.4.1 evaluate_0.7
## [19] labeling_0.3 biomaRt_2.25.1
## [21] lmtest_0.9-34 proto_0.3-10
## [23] Rcpp_0.11.6 scales_0.2.5
## [25] formatR_1.2 XVector_0.9.1
## [27] Rsamtools_1.21.8 digest_0.6.8
## [29] stringi_0.5-4 tools_3.2.0
## [31] bitops_1.0-6 magrittr_1.5
## [33] lazyeval_0.1.10 RCurl_1.95-4.6
## [35] RSQLite_1.0.0 futile.options_1.0.0
## [37] MASS_7.3-40 assertthat_0.1
## [39] rmarkdown_0.6.1 GenomicAlignments_1.5.9